####*********************************************************************
##### OUTPUTTING THE SUMMARY STATISTICS IN TABLES AND FIGURES ***********
####*********************************************************************

#### industry decomposition - 2010-2023 ###########
industries <- read_xlsx("data/official_statistics/NAICS 2digit.xlsx") %>% 
  select(industry_code, industry_name = Name) ## NAICS 2-digit industry codes

## reading employment data from BLS
files_path <- paste0(main_dir, "data/official_statistics/ces_industry_level_employment/")
file_list <- list.files(files_path, pattern="*.xlsx", full.names = TRUE)
emp_list <- list()
for (file_path in file_list){
  emp_CES <-  data.frame(read_xlsx(file_path))
  emp_list <- append(emp_list, list(emp_CES))
}

## merging the BLS employment data
emp_CES <- bind_rows(emp_list) %>% 
  clean_names() %>% 
  mutate(industry_code = substr(naics,1,2)) %>%
  rename(n_worker=total_employees) %>%
  mutate(month=sub("M","",period)) %>%
  select(n_worker,industry_code,year,month) %>%
  mutate_all(as.numeric) %>%
  # sum across three digit industries 
  group_by(industry_code, year, month) %>%
  summarise_all(sum)


emp_CES <- emp_CES %>% 
  group_by(year, month) %>% mutate(percent = n_worker/sum(n_worker)*100) %>% 
  group_by(industry_code) %>% summarise(percent = mean(percent)) %>% 
  full_join(industries, by = "industry_code") %>% 
  mutate(percent = ifelse(is.na(percent), 0, percent)) %>% 
  # this step is to sum across industries with same big category (Retail Trade) but different naics (44 and 45)
  group_by(industry_name) %>% summarise(percent = sum(percent)) 

rm(emp_list, file_list)


## reading the industry statistics obtained from payroll_provider data
emp_payroll_provider <- readRDS(paste0(project_dir, "data/tmp_data/stat_industry.rds")) %>% 
  mutate_all(as.numeric) %>%
  group_by(year, clt_industry_code) %>% 
  summarise(n = sum(n)) %>%  ungroup()

# Find percentage of workers by industry (conditional of worker having an industry)
emp_payroll_provider <- emp_payroll_provider %>% 
  filter(!is.na(clt_industry_code) & clt_industry_code != 99) %>% 
  group_by(year) %>% mutate(percent = n/sum(n)*100) %>% 
  group_by(clt_industry_code) %>% summarise(percent = mean(percent)) %>% 
  select(industry_code = clt_industry_code, percent) %>% 
  full_join(industries, by = "industry_code") %>% 
  mutate(percent = ifelse(is.na(percent), 0, percent)) %>%
  group_by(industry_name) %>% summarise(percent = sum(percent))

# merging the official data and payroll_provider data
emp <- rbind(emp_CES %>% mutate(source = "CES"), emp_payroll_provider %>% mutate(source = "payroll_provider"))

# extract the (sorted) industry names
industry_levels <- emp_CES %>% arrange(percent) %>% .$industry_name
emp$industry_name <- factor(emp$industry_name, levels = industry_levels)

# create text_pos and percent_text which will be useful for the plot
emp <- emp %>% mutate(industry_num = as.numeric(industry_name), 
                      text_pos = case_when(percent < 1.0 ~ percent + 1.2, 
                                           percent/2 < 1 ~ 1,
                                           T ~ percent/2),
                      percent_text = round(percent, 2))


## plotting the industry distributions
pdf(paste0(project_dir,"/figures_tables/industry_decomposition.pdf"), width = 12, height = 6)
emp %>% 
  ggplot (aes(x = industry_num,
              y = percent,
              fill = source)) +
  scale_x_continuous(breaks = c(1:length(industry_levels)), labels = industry_levels)+
  scale_y_continuous(limits = c(0,17), breaks = c(0,5,10,15))+
  theme_bw() + 
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank()) +
  geom_bar(stat="identity", position = "dodge", width = 0.7) + 
  labs(y = "Percent",x = "Industry", fill = "Source") +
  theme(text = element_text(size=12),
        plot.title = element_text(vjust = 0.5, hjust = 0.5,
                                  margin = margin(0.1,0,0.5,0,"cm"), size = rel(1)),
        axis.title.x = element_text(margin = margin(0.2, 0,0,0,unit = "cm"), size = rel(1)),
        axis.text.x  = element_text(angle=45, hjust=1, size = rel(0.8),
                                    color = "black", lineheight = 1, 
                                    margin = margin(0.2, 0,0.5,0,unit = "cm")),
        axis.text.y  = element_text(angle=0, vjust=0.5, hjust=0.5,
                                    color = "black", margin = margin(0, 0.2,0,0,unit = "cm")),
        legend.position = "top",
        legend.direction = "horizontal",
        legend.margin = margin(.2,0.7,0,0,"cm"),
        legend.text = element_text(size = rel(1), margin = margin(0, 0.5,0,0,unit = "cm")),
        legend.title = element_text(size = rel(1), margin = margin(0, 0.5,0,0,unit = "cm")),
        legend.key.height = unit(0.7, "cm")) +
  scale_fill_manual(values = colors) +
  guides(fill = guide_legend(keywidth = .8, keyheight = .8)) +
  geom_text(data = emp %>% filter(source == "CES"),
            aes(x = industry_num - 0.18, y = text_pos, label = paste0(percent_text,"%")),
            size=3.3,angle = 90) +
  geom_text(data = emp %>% filter(source == "payroll_provider"),
            aes(x = industry_num + 0.16, y = text_pos, label = paste0(percent_text,"%")),
            size=3.3, angle = 90)
dev.off()


#### Hourly vs. salaried employment breakdown per year from 2013-2023 #########

cps <- read_rds(paste0(main_dir, "data/official_statistics/cps/cps_basic.rds"))

cps_hourly <- cps %>%
  filter(peernhry > 0) %>% 
  group_by(year,peernhry) %>%
  summarise(n = n()) %>% 
  mutate(percent = n/sum(n)*100)

cps_hourly_weighted <-  cps %>%
  filter(peernhry > 0) %>%
  group_by(year,peernhry) %>%
  summarise(n = sum(pwsswgt/1000000)) %>%
  mutate(percent = n/sum(n)*100)

##### reading CPS monthly micro data and finding the worker decomposition and saving the relevant info

stat_cps <- cps_hourly %>% 
  mutate(worker_type = recode(peernhry,"NONHOURLY WORKER"="salary", "HOURLY WORKER"="hourly")) %>%
  mutate(worker_type=factor(worker_type, levels = c("hourly","salary"))) %>%
  group_by(year, worker_type) %>%
  summarise(percent = mean(percent)) %>% 
  mutate(type = "CPS", year = as.numeric(year))

stat_cps_weighted <- cps_hourly_weighted %>% 
  mutate(worker_type = recode(peernhry,"NONHOURLY WORKER"="salary", "HOURLY WORKER"="hourly")) %>%
  mutate(worker_type=factor(worker_type, levels = c("hourly","salary"))) %>%
  group_by(year, worker_type) %>% summarise(percent = mean(percent)) %>% 
  mutate(type = "CPS Weighted", year = as.numeric(year))

##reading the statistics obtained from payroll_provider data
stat_hourly <- readRDS(paste0(project_dir,"/data/tmp_data/stat_hourly.rds")) %>% 
  group_by(year, worker_type) %>% summarise(n = sum(n))

stat_hourly <- stat_hourly %>% 
  mutate(worker_type = ifelse(worker_type %in% c("hourly","tipped"), "hourly", "salary")) %>% 
  group_by(year, worker_type) %>% summarise_all(sum) %>% 
  group_by(year) %>% mutate(percent = n/sum(n)*100) %>% 
  mutate(year = as.numeric(year),
         worker_type = factor(worker_type, levels = c("hourly","salary")))

stat_hourly <- stat_hourly %>% mutate(type = "payroll_provider") %>% 
  select(year, type, worker_type, percent) %>% 
  rbind(stat_cps %>% select(year, type, worker_type, percent)) %>% 
  rbind(stat_cps_weighted %>% select(year, type, worker_type, percent)) %>% 
  mutate(percent_text = round(percent, 2)) %>% 
  mutate(type = factor(type, levels = c("payroll_provider", "CPS",
                                        "CPS Weighted")))

stat_hourly$worker_type <- factor(stat_hourly$worker_type, levels=c("salary","hourly"))


# Plot: comparison of share of hourly workers with CPS
pdf(paste0(project_dir, "figures_tables/hourly_vs_salary_workers_cps_comparison.pdf")
    , width = 10, height = 6)
stat_hourly %>% filter(worker_type == "hourly" & type != "CPS Weighted") %>% 
  ggplot (aes(x = year, 
              y = percent,
              fill = type)) +
  theme_bw() + 
  theme(panel.grid.minor.x = element_blank()) +
  geom_bar(stat = "identity", width = 0.7, position = "dodge") + 
  labs(y = "Percent", x = "Year", fill = "Data Source" ,
       title = "Share of Hourly Workers") +
  scale_x_continuous(breaks = c(2010:2023)) +
  theme(text = element_text(size=14),
        axis.title.y = element_text(angle = 90,vjust=0,hjust = 0.5,
                                    margin = margin(0, 0.5,0,0,unit = "cm"),
                                    face = "bold"),
        axis.title.x = element_text(margin = margin(0.5, 0,0.2,0,unit = "cm"),
                                    face = "bold"),
        axis.text.x  = element_text(angle=0,  hjust=0.5,vjust=1, color = "black",
                                    margin = margin(0.2, 0,0,0,unit = "cm"), lineheight = 1.2),
        axis.text.y  = element_text(angle=0, vjust=0.5, hjust=0.5, 
                                    color = "black", 
                                    margin = margin(0, 0.2,0,0,unit = "cm"),
                                    lineheight = 1.2),
        plot.title = element_text(hjust = 0.5, size = rel(1.1)),
        plot.margin = margin(0.2,0.2,0,0.5,"cm"),
        legend.position = "right" ) +
  scale_fill_manual(values = colors) +
  geom_text(data = stat_hourly %>% filter(worker_type == "hourly" & type == "Payroll Provider"),
            aes(x = year - 0.15, y = percent/2, label = paste0(percent_text,"%")),
            size=3, angle = 90) +
  geom_text(data = stat_hourly %>% filter(worker_type == "hourly" & type == "CPS"),
            aes(x = year +0.15, y = percent/2, label = paste0(percent_text,"%")),
            size=3, angle = 90) 
dev.off()

#### Firm-size distribution in Q1 2019 - comparing with BLS##########

size_levels <- c("1-4", "5-9", "10-19", "20-49","50-99", "100-249", 
                 "250-499", "500-999", "1000 or more")
stat_BLS <- read_xlsx("data/official_statistics/firm_size_distribution_national.xlsx",
                      sheet = "Shares")
stat_BLS <- stat_BLS %>% filter(Year == 2023)


## firm size in payroll_provider data using the number of paid contract
stat_payroll_provider <- readRDS(paste0(project_dir,"/data/tmp_data/stat_firm_size.rds")) %>% 
  group_by(size) %>% summarise(n = sum(n)) %>% 
  mutate(size_class = case_when(is.na(size) | size < 1 ~ "NA",
                                size < 5 ~ "1-4",
                                size < 10 ~ "5-9",
                                size < 20 ~ "10-19",
                                size < 50 ~ "20-49",
                                size < 100 ~ "50-99",
                                size < 250 ~ "100-249",
                                size < 500 ~ "250-499",
                                size < 1000 ~ "500-999",
                                T ~ "1000 or more") %>% 
           factor(levels = size_levels)) %>% 
  group_by(size_class) %>% summarise(n = sum(n)) %>% 
  filter(size_class != "NA") %>%
  mutate(percent = n/sum(n)*100) %>% 
  arrange(size_class)

## number of firms in the payroll_provider data, q1 of 2023
fact <- stat_payroll_provider %>% summarise(n = sum(n)) %>% .$n
write(paste("Number of firms in payroll_provider data in 2023Q1 is: ", fact), file = facts_file_path, append=TRUE)

##firm size in payroll_provider data using the client_temp raw data
client_raw <- read.csv("data/payroll_provider/Client_Temp.csv") %>% clean_names() %>% 
  filter(clt_startdate < "2023-04-01" & clt_lostdate >= "2023-01-01" &
           clt_legalcountry == "United States") %>%
  select(size = clt_employeesactive) %>% 
  mutate(size_class = case_when(is.na(size) | size < 1 ~ "NA",
                                size < 5 ~ "1-4",
                                size < 10 ~ "5-9",
                                size < 20 ~ "10-19",
                                size < 50 ~ "20-49",
                                size < 100 ~ "50-99",
                                size < 250 ~ "100-249",
                                size < 500 ~ "250-499",
                                size < 1000 ~ "500-999",
                                T ~ "1000 or more")) %>%
  group_by(size_class) %>% summarise(n = n()) %>% filter(size_class != "NA") %>% 
  mutate(percent = n/sum(n)*100, size_class = factor(size_class, levels = size_levels)) %>% 
  arrange(size_class)

## number of firms active in Q1 2023 in Client_Temp raw data 
fact <- client_raw %>% summarise(n = sum(n)) %>% .$n
write(paste("Number of clients in raw payroll_provider data in 2023Q1 is: ", fact), file = facts_file_path, append=TRUE)

## merging the data from the three sources in one table and exporting it
stat_firm_size <- matrix(0, ncol = length(size_levels) + 1, nrow = 2)
stat_firm_size[1,-1] <- as.numeric(stat_BLS[1,-1])      
stat_firm_size[2,-1] <- round(stat_payroll_provider$percent,4)
#stat_firm_size[3,-1] <- round(client_raw$percent,4)
stat_firm_size <- data.frame(stat_firm_size) %>% mutate_all(~paste0(.," %"))
colnames(stat_firm_size) <- c("source", size_levels)
stat_firm_size[,1] <- c("BLS", "Payroll Provider") #, "payroll_provider - raw client data")
write_xlsx(stat_firm_size, paste0(project_dir, "/figures_tables/firm_size_distribution_BLS.xlsx"))

colnames(stat_firm_size) <- gsub(" or more", "$+$", colnames(stat_firm_size))
colnames(stat_firm_size) <- gsub("-", " to ", colnames(stat_firm_size))

latex_table <- xtable(stat_firm_size, caption = "Distribution of Firms by Size Percentage")
align(latex_table) <- c("1", rep("c", ncol(stat_firm_size)))

print(
  latex_table, 
  file = "figures_tables/firm_size_dist.tex", 
  include.rownames = FALSE, 
  sanitize.text.function = identity, 
  caption.placement = "top", 
  booktabs = TRUE
)

print(latex_table, 
      include.rownames = FALSE, 
      santitize.text.function = identity, 
      booktabs= TRUE)



#### Hourly Workers' characteristics 2014-2019 ########################
### mean hourly wage rate ##########

wage_cps <- cps %>% 
  filter(peernhry == "HOURLY WORKER" & !is.na(prernhly)) %>%
  summarise(mean_wage = mean(prernhly), n = n()) %>%
  mutate(mean_wage = ifelse(mean_wage > 100,
                            mean_wage/100,
                            mean_wage))

wage_cps_weighted <- cps %>% 
  filter(peernhry == "HOURLY WORKER" & !is.na(prernhly) & !is.na(pwsswgt)) %>%
  summarise(mean_wage_weighted = weighted.mean(prernhly, pwsswgt/1000000), 
            mean_wage = mean(prernhly), n = n(), weight = sum(pwsswgt)) %>%
  mutate(mean_wage_weighted = ifelse(mean_wage_weighted > 100,
                                     mean_wage_weighted/100,
                                     mean_wage_weighted),
         mean_wage = ifelse(mean_wage > 100,
                            mean_wage/100,
                            mean_wage))

## merging with payroll_provider data and outputting a table
wage_payroll_provider <- readRDS(paste0(project_dir, "/data/tmp_data/stat_wage.rds")) 

wage_payroll_provider %>% summarise(n = sum(n)) ## number of hourly workers in 

stat <- data.frame(source = c("CPS", "Payroll Provider", "Payroll Provider (topcoded as CPS)"),
                   mean_wage = c(weighted.mean(wage_cps_weighted$mean_wage_weighted, wage_cps_weighted$weight),
                                 weighted.mean(wage_payroll_provider$mean_wage, wage_payroll_provider$n), 
                                 weighted.mean(wage_payroll_provider$mean_wage_cps_topcode, wage_payroll_provider$n)))
stat <- stat %>%
  mutate(mean_wage = round(mean_wage, 3))

colnames(stat) <- c("Source", "Average Hourly Wage")  
write_xlsx(stat, paste0(project_dir, "/figures_tables/average hourly wage.xlsx")) 

latex_table <- xtable(stat, caption = "Average hourly wage in the Payroll Provider data vs. the CPS")
align(latex_table) <- c("1", rep("c", ncol(stat)))

print(
  latex_table, 
  file = "figures_tables/tablee1_provider_wage.tex", 
  include.rownames = FALSE, 
  sanitize.text.function = identity, 
  caption.placement = "top", 
  booktabs = TRUE
)

print(latex_table, 
      include.rownames = FALSE, 
      santitize.text.function = identity, 
      booktabs= TRUE)


#### hourly workers by industry ###########
industries <- read_xlsx("data/official_statistics/NAICS 2digit.xlsx") %>% 
  select(industry_code, industry_name = Name) ## NAICS 2-digit industry codes

## reading CPS employment data
setwd(paste0(main_dir,"data/official_statistics/cps/industry/"))
file_list <- list.files(pattern="*.xlsx")
cps_list <- list()
for (file_path in file_list){
  stat <- read_xlsx(file_path) %>% 
    select(employment,year,naics) %>%
    rename(n=employment, industry_code=naics)
  
  cps_list <- append(cps_list, list(stat))
}
setwd(main_dir)

## merging the CPS data
stat_cps <- bind_rows(cps_list) %>% clean_names() %>% mutate_all(as.numeric) %>% 
  group_by(industry_code) %>% summarise_all(sum) %>%
  mutate(percent = n/sum(n)*100) %>% 
  full_join(industries, by = "industry_code") %>% 
  mutate(percent = ifelse(is.na(percent), 0, percent),
         n = ifelse(is.na(n), 0, n)) %>% 
  group_by(industry_name) %>% summarise(percent = sum(percent), n = sum(n))

## reading the industry statistics obtained from payroll_provider data
stat_payroll_provider <- readRDS(paste0(project_dir,"/data/tmp_data/stat_worker_industry.rds"))%>% 
  mutate_all(as.numeric) %>% mutate(clt_industry_code = ifelse(clt_industry_code == 99, NA,
                                                                  clt_industry_code)) %>% 
  mutate(percent = n/sum(n)*100) %>% 
  select(industry_code = clt_industry_code, percent,n) %>% 
  full_join(industries, by = "industry_code") %>% 
  mutate(percent = ifelse(is.na(percent), 0, percent),
         n = ifelse(is.na(n), 0, n)) %>% 
  group_by(industry_name) %>% summarise(percent = sum(percent), n = sum(n))

stat_payroll_provider2 <- readRDS(paste0(project_dir,"/data/tmp_data/stat_worker_industry.rds")) %>% 
  mutate_all(as.numeric) %>% mutate(clt_industry_code = ifelse(clt_industry_code == 99, NA,
                                                                  clt_industry_code)) %>% 
  filter(!is.na(clt_industry_code)) %>% 
  mutate(percent = n/sum(n)*100) %>% 
  select(industry_code = clt_industry_code, percent,n) %>% 
  full_join(industries, by = "industry_code") %>% 
  mutate(percent = ifelse(is.na(percent), 0, percent),
         n = ifelse(is.na(n), 0, n)) %>% 
  group_by(industry_name) %>% summarise(percent = sum(percent), n = sum(n))

## merging the official data and payroll_provider data
stat <- rbind(stat_cps %>% mutate(source = "CPS"),
              stat_payroll_provider2 %>% mutate(source = "Payroll Provider"),
              stat_payroll_provider %>% mutate(source = "payroll_provider2"))
industry_levels <- stat_cps %>% arrange(percent) %>% .$industry_name
stat$industry_name <- factor(stat$industry_name, levels = industry_levels)
industry_levels[1] <- "Management of \n Companies and Enterprises"
industry_levels[15] <- "Administrative and Support and Waste\n Management and Remediation Services"
levels(stat$industry_name) <- industry_levels
stat <- stat %>% mutate(industry_num = as.numeric(industry_name), 
                        text_pos = case_when(percent < 1.0 ~ percent + 1.2, 
                                             percent/2 < 1 ~ 1,
                                             T ~ percent/2),
                        percent_text = round(percent, 2))


## plotting the industry distributions
pdf(paste0(project_dir, "figures_tables/fige2_industry decomposition_hourly workers_2014_2023.pdf")
    , width = 12, height = 6)
stat %>% filter(source != "payroll_provider2") %>% 
  ggplot (aes(x = industry_num,
              y = percent,
              fill = source)) +
  scale_x_continuous(breaks = c(1:length(industry_levels)), labels = industry_levels)+
  scale_y_continuous(limits = c(0,18), breaks = c(0,5,10,15))+
  theme_bw() + 
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank()) +
  geom_bar(stat="identity", position = "dodge", width = 0.7) + 
  labs(y = "Percent",x = "Industry", fill = "Source") +
  theme(text = element_text(size=12),
        plot.title = element_text(vjust = 0.5, hjust = 0.5,
                                  margin = margin(0.1,0,0.5,0,"cm"), size = rel(1)),
        axis.title.x = element_text(margin = margin(0.2, 0,0,0,unit = "cm"), size = rel(1)),
        axis.text.x  = element_text(angle=45, hjust=1, size = rel(0.8),
                                    color = "black", lineheight = 1, 
                                    margin = margin(0.2, 0,0.5,0,unit = "cm")),
        axis.text.y  = element_text(angle=0, vjust=0.5, hjust=0.5,
                                    color = "black", margin = margin(0, 0.2,0,0,unit = "cm")),
        legend.position = "top",
        legend.direction = "horizontal",
        legend.margin = margin(.2,0.7,0,0,"cm"),
        legend.text = element_text(size = rel(1), margin = margin(0, 0.5,0,0,unit = "cm")),
        legend.title = element_text(size = rel(1), margin = margin(0, 0.5,0,0,unit = "cm")),
        legend.key.height = unit(0.7, "cm")) +
  scale_fill_manual(values = colors) +
  guides(fill = guide_legend(keywidth = .8, keyheight = .8)) +
  geom_text(data = stat %>% filter(source == "CPS"),
            aes(x = industry_num - 0.18, y = text_pos, label = paste0(percent_text,"%")),
            size=3.3,angle = 90) +
  geom_text(data = stat %>% filter(source == "Payroll Provider"),
            aes(x = industry_num + 0.16, y = text_pos, label = paste0(percent_text,"%")),
            size=3.3, angle = 90)
dev.off()


stat_table <- full_join(stat %>% filter(source == "payroll_provider2") %>% arrange(industry_name) %>% 
                          mutate( payroll_provider = paste0(percent_text, "%")) %>% 
                          select(industry_name, payroll_provider),
                        stat %>% filter(source == "payroll_provider") %>% arrange(industry_name) %>% 
                          mutate( payroll_provider2 = paste0(percent_text, "%")) %>% 
                          select(industry_name, payroll_provider2)) %>% 
  full_join(stat %>% filter(source == "CPS") %>% arrange(industry_name) %>% 
              mutate( CPS = paste0(percent_text, "%")) %>% 
              select(industry_name, CPS))

stat_table$industry_name <- as.character(stat_table$industry_name)
stat_table$industry_name[is.na(stat_table$industry_name)] <- "Not Specified"
stat_table <- rbind(stat_table,
                    data.frame(industry_name = "N",
                               payroll_provider = stat %>% filter(source == "payroll_provider2") %>%
                                 .$n %>% sum(na.rm = F),
                               payroll_provider2 = stat %>% filter(source == "payroll_provider") %>%
                                 .$n %>% sum(na.rm = F),
                               CPS = stat %>% filter(source == "CPS") %>%
                                 .$n %>% sum(na.rm = F)))

colnames(stat_table) <- c("Industry", "payroll_provider", "payroll_provider \n(Industry available)", "CPS")
write_xlsx(stat_table, paste0(project_dir, "/figures_tables/hourly workers by industry.xlsx")) 


### latex
stat <- read_xlsx( paste0(project_dir, "/figures_tables/hourly workers by industry.xlsx"))
for (i in 1:21) stat[i,-1] <- as.list(ifelse(is.na(stat[i,-1]), "",
                                             str_sub(stat[i,-1], end = -3) %>% paste0("\\%")))
stat[22,-1] <- as.list(gsub('(?=(?:.{3})+$)', ",", stat[22,-1], perl = TRUE))
stat[22,-1] <- as.list(ifelse(str_detect(stat[22,-1],"^,"),
                              str_sub(stat[22,-1], start = 2),
                              stat[22,-1]))
write.table(stat, sep = "&",
            paste0(project_dir,"/figures_tables/hourly workers by industry_latex.txt"))

x <-  read_file(paste0(project_dir,"/figures_tables/hourly workers by industry_latex.txt"))
x <- str_replace_all(x, '\\"', " ")
x <- str_replace_all(x, '\n [:digit:]{1} \\&', "\n ")

write_file(x, append = F, paste0(project_dir,
                                 "/figures_tables/hourly workers by industry_latex.txt"))



rm(list=ls()) 
gc() #free up memory used by R
#########################################################